
source("preamble.R")

IMG_FILE_EXT = ".pdf"

# Detection Performance (According to Respondent) -------------------------
message("Detection Performance (According to Respondent)")

## Accuracy (% of detections/nondetections where respondent agrees)
acc.self <- data.frame(
  issue.acc = with(
    treat_conf, 
    mean(issue_detect_correct, na.rm = T)
  ),
  # econd_tone.acc = with(
  #   treat_conf,
  #   mean(econd_tone_detect_correct, na.rm = T)
  # ),
  # excluding when we just guess negative tone
  econd_tone.acc = with(
    treat_conf, 
    mean(econd_tone_detect_correct[econd_tone_1_targets_detected_n == 1], na.rm = T)
  ),
  econd_reason.acc = with(
    treat_conf, 
    mean(econd_reason_detect_correct, na.rm = T)
  ),
  news.acc = with(
    treat_conf, 
    mean(news_detect_correct, na.rm = T)
  ),
  occu.acc = with(
    treat_conf, 
    mean(occu_detect_correct, na.rm = T)
  )
)

## Precision (% of detections where respondent confirms yes)
precision.self <- data.frame(
  issue.prec = with(
    treat_conf,
    mean(issue_detect_correct[!(issue_to_confirm == na_)], na.rm = T)
  ),
  econd_tone.prec = with(
    treat_conf,
    mean(econd_tone_detect_correct[econd_tone_to_confirm %in% sent_], na.rm = T)
  ),
  # # excluding when we just guess negative tone 
  # econd_tone.prec = with(
  #   treat_conf,
  #   mean(econd_tone_detect_correct[econd_tone_to_confirm %in% sent_ & 
  #                                    econd_tone_1_targets_detected_n == 1], na.rm = T)
  # ),
  econd_reason.prec = with(
    treat_conf,
    mean(econd_reason_detect_correct[!(econd_reason_to_confirm == na_)], na.rm = T)
  ),
  news.prec = with(
    treat_conf,
    mean(news_detect_correct[!(news_to_confirm == na_)], na.rm = T)
  ),
  occu.prec = with(
    treat_conf,
    mean(occu_detect_correct[!(occu_to_confirm == na_)], na.rm = T)
  )
)

## Recall (% of category incidences where detection occurs)
recall.self <- data.frame(
  issue.recall = with(
    treat_conf,
    mean(issue_detect_correct[!(issue_confirm_other %in% na_)], na.rm = T)
  ),
  econd_tone.recall = with(
    treat_conf,
    mean(econd_tone_detect_correct[econd_tone_confirm_correct %in% sent_], na.rm = T)
  ),
  econd_reason.recall = with(
    treat_conf,
    mean(econd_reason_detect_correct[!(econd_reason_confirm_other %in% na_)], na.rm = T)
  ),
  news.recall = with(
    treat_conf,
    mean(news_detect_correct[!(news_confirm_other %in% na_)], na.rm = T)
  ),
  occu.recall = with(
    treat_conf,
    mean(occu_detect_correct[!(occu_confirm_other %in% na_)], na.rm = T)
  )
)

bind_rows(
  acc.self %>%
    rename_all(~stringr::str_replace(.x, ".acc", "")) %>%
    mutate(metric="Accuracy", .before = everything()),
  precision.self %>%
    rename_all(~stringr::str_replace(.x, ".prec", "")) %>%
    mutate(metric="Precision", .before = everything()),
  recall.self %>%
    rename_all(~stringr::str_replace(.x, ".recall", "")) %>%
    mutate(metric="Recall", .before = everything())
)

# Detection Performance (According to Coders) -----------------------------
message("Detection Performance (According to Coders)")

## Accuracy
acc.coders <- data.frame(
  issue.acc = with(
    treat_conf, 
    mean(issue_to_confirm == issue_coded, na.rm=T)
  ),
  econd_tone.acc = with(
    treat_conf,
    mean(econd_tone_detect_correct, na.rm = T)
  ),
  # # excluding when we just guess negative tone
  # econd_tone.acc = with(
  #   treat_conf, 
  #   mean(econd_tone_detect_correct[econd_tone_1_targets_detected_n == 1], na.rm = T)
  # ),
  econd_reason.acc = with(
    treat_conf, 
    mean(econd_reason_to_confirm == econd_reason_coded, na.rm = T)
  ),
  news.acc = with(
    treat_conf, 
    mean(news_to_confirm == news_coded, na.rm = T)
  ),
  occu.acc = with(
    treat_conf, 
    mean(occu_to_confirm == occu_coded, na.rm = T)
  )
)

## Precision
precision.coders <- data.frame(
  issue.prec = with(
    treat_conf, 
    mean((issue_to_confirm == issue_coded)[issue_to_confirm != na_], na.rm=T)
  ),
  econd_tone.prec = with(
    treat_conf,
    mean(econd_tone_detect_correct, na.rm = T)
  ),
  # # excluding when we just guess negative tone
  # econd_tone.prec = with(
  #   treat_conf, 
  #   mean(econd_tone_detect_correct[econd_tone_1_targets_detected_n == 1], na.rm = T)
  # ),
  econd_reason.prec = with(
    treat_conf, 
    mean((econd_reason_to_confirm == econd_reason_coded)[econd_reason_to_confirm != na_], na.rm = T)
  ),
  news.prec = with(
    treat_conf, 
    mean((news_to_confirm == news_coded)[news_to_confirm != na_], na.rm = T)
  ),
  occu.prec = with(
    treat_conf, 
    mean((occu_to_confirm == occu_coded)[occu_to_confirm != na_], na.rm = T)
  )
)

## Recall
recall.coders <- data.frame(
  issue.recall = with(
    treat_conf, 
    mean((issue_to_confirm == issue_coded)[issue_coded != na_], na.rm=T)
  ),
  econd_tone.recall = with(
    treat_conf,
    mean(econd_tone_detect_correct, na.rm = T)
  ),
  # # excluding when we just guess negative tone
  # econd_tone.acc = with(
  #   treat_conf, 
  #   mean(econd_tone_detect_correct[econd_tone_1_targets_detected_n == 1], na.rm = T)
  # ),
  econd_reason.recall = with(
    treat_conf, 
    mean((econd_reason_to_confirm == econd_reason_coded)[econd_reason_coded != na_], na.rm = T)
  ),
  news.recall = with(
    treat_conf, 
    mean((news_to_confirm == news_coded)[news_coded != na_], na.rm = T)
  ),
  occu.recall = with(
    treat_conf, 
    mean((occu_to_confirm == occu_coded)[occu_coded != na_], na.rm = T)
  )
)

bind_rows(
  acc.coders %>%
    rename_all(~stringr::str_replace(.x, ".acc", "")) %>%
    mutate(metric="Accuracy", .before = everything()),
  precision.coders %>%
    rename_all(~stringr::str_replace(.x, ".prec", "")) %>%
    mutate(metric="Precision", .before = everything()),
  recall.coders %>%
    rename_all(~stringr::str_replace(.x, ".recall", "")) %>%
    mutate(metric="Recall", .before = everything())
)

# Evidence of Acquiescence Bias in Confirmation Probing -----------------------
message("Evidence of Acquiescence Bias in Confirmation Probing")

x <- "Coded in Conf.\nCondition OE (Majority)"
y <- "Confirmed by Respondent\nin Conf. Condition OE"

ggarrange(
  make_category_correlation_plot(cat_distr$issue, x, y) + 
    ggrepel::geom_text_repel(aes(label=cat)) +
    ggtitle("Most Imp. Issue"),
  make_category_correlation_plot(cat_distr$econd_reason, x, y) + 
    ggrepel::geom_text_repel(aes(label=cat), max.overlaps = 5) +
    ggtitle("Econ. Cond. (Reason)"),
  make_category_correlation_plot(cat_distr$news, x, y) + 
    ggrepel::geom_text_repel(aes(label=cat)) +
    ggtitle("Pref. News"),
  make_category_correlation_plot(cat_distr$occu, x, y) + 
    ggrepel::geom_text_repel(aes(label=cat),
                             box.padding = 0.75, max.overlaps = 5) +
    ggtitle("Main Occu.")
)
ggsave(here(paste0("figures/main_conf_vs_coded", IMG_FILE_EXT)),
       height = 12, width = 12)

# Comparison of Response Categories (Confirmed vs. Detected in Other Open-Ends) -----
message("Comparison of Response Categories (Confirmed vs. Detected in Other Open-Ends)")

ggarrange(
  make_category_correlation_plot(cat_distr$issue, 
                                 "Detected in Control Condition OE", 
                                 "Confirmed by Respondent\nin Conf. Condition OE") + 
    ggrepel::geom_text_repel(aes(label=cat)) +
    ggtitle("Most Imp. Issue"),
  make_category_correlation_plot(cat_distr$news, 
                                 "Detected in Elab/Rel. Condition OE", 
                                 "Confirmed by Respondent\nin Conf. Condition OE") + 
    ggrepel::geom_text_repel(aes(label=cat),
                             max.overlaps = 5) +
    ggtitle("Pref. News"),
  make_category_correlation_plot(cat_distr$occu, 
                                 "Detected in Elab/Rel. Condition OE",
                                 "Confirmed by Respondent\nin Conf. Condition OE") + 
    ggrepel::geom_text_repel(aes(label=cat),
                             box.padding = 1) +
    ggtitle("Main Occu.")
)
ggsave(here(paste0("figures/main_conf_vs_det_other", IMG_FILE_EXT)),
       height = 12, width = 12)

# Rates of Probes Triggered in Elab/Rel Treatment Condition ---------------
message("Rates of Probes Triggered in Elab/Rel Treatment Condition")

combined %>%
  filter(treatment == "Elab/Rel.") %>%
  select(respondent_id, matches("(1_response|probe_flag|dropout_b4)$"), -matches("frustrated")) %>%
  pivot_longer(-respondent_id, 
               # names_pattern = "(.*)_(elab|rel|response|dropout_b4|probe$)",
               names_pattern = "(.*)_(elab|rel|1_response|dropout_b4)", 
               names_to=c("exp",".value")) %>% 
  # group_by(respondent_id, exp) %>% filter(elab & rel) %>% as.data.frame()
  filter(!is.na(`1_response`), !dropout_b4) %>%
  mutate(`Probing Behavior` = case_when(
    elab & !rel ~ "Elaboration Probe\nTriggered",
    !elab & rel ~ "Relevance Probe\nTriggered",
    elab & rel ~ "Hybrid Probe\nTriggered",
    is.na(elab) & is.na(rel) ~ "No Probe\nTriggered",
    !elab & !rel ~ "Probe Error",
  ) %>% factor(.,
               levels = c(
                 "Elaboration Probe\nTriggered",
                 "Relevance Probe\nTriggered",
                 "Hybrid Probe\nTriggered",
                 "No Probe\nTriggered",
                 "Probe Error"
               )), exp = exp_ordered_labels_f(exp)) %>%
  tabyl(`Probing Behavior`, exp, show_missing_levels = FALSE) %>%
  adorn_percentages("col") %>%
  adorn_pct_formatting(digits = 0) %>%
  adorn_ns() %>%
  flextable() %>%
  set_caption(
    number_tab("Probing Behavior in Elab/Rel. Treatment Group")
  ) %>%
  set_table_properties(layout = "autofit", width = 1)

combined %>%
  filter(issue_elab_probe_flag==T & issue_rel_probe_flag==T) %>%
  select(issue_2_probe)

combined %>%
  filter(econd_elab_probe_flag==T & econd_rel_probe_flag==T) %>%
  select(econd_2_probe)

combined %>%
  filter(news_elab_probe_flag==T & news_rel_probe_flag==T) %>%
  select(news_2_probe)

combined %>%
  filter(occu_elab_probe_flag==T & occu_rel_probe_flag==T) %>%
  select(occu_2_probe)

# Examples of Elaboration/Relevance Probes --------------------------------
message("Examples of Elaboration/Relevance Probes")

set.seed(60603)

combined %>%
  filter(treatment == "Elab/Rel.") %>%
  select(respondent_id, matches("(1_response|2_probe|probe_flag)$"), -matches("frustrated")) %>%
  pivot_longer(-respondent_id, 
               # names_pattern = "(.*)_(elab|rel|response|dropout_b4|probe$)",
               names_pattern = "(.*)_(elab|rel|1_response|2_probe)", 
               names_to=c("exp",".value")) %>% 
  filter(!is.na(`1_response`)) %>%
  filter(!(elab & rel) & (elab | rel)) %>%
  mutate(`Probing Behavior` = case_when(
    elab & !rel ~ "Elaboration Probe\nTriggered",
    !elab & rel ~ "Relevance Probe\nTriggered",
    elab & rel ~ "Hybrid Probe\nTriggered",
    is.na(elab) & is.na(rel) ~ "No Probe\nTriggered",
    !elab & !rel ~ "Probe Error",
  ) %>% factor(.,
               levels = c(
                 "Elaboration Probe\nTriggered",
                 "Relevance Probe\nTriggered",
                 "Hybrid Probe\nTriggered",
                 "No Probe\nTriggered",
                 "Probe Error"
               )), exp = exp_ordered_labels_f(exp)) %>%
  select(exp, `Probing Behavior`, `Seed Response`=`1_response`, `Probe Text`=`2_probe`) %>%
  group_by(exp, `Probing Behavior`) %>%
  sample_n(1) %>%
  ungroup() %>%
  mutate(`Example` = paste0("[Seed Response]: ", `Seed Response`, "\n\n",
                            "[Probe Text]: ", stringr::str_to_sentence(`Probe Text`) %>%
                              str_replace_all(" i ", " I ") %>%
                              str_replace_all("i'm", "I'm") %>%
                              str_replace_all("i'd", "I'd") %>%
                              gsub("\\. (.)", ". \\U\\1", ., perl= TRUE))) %>%
  select(-`Seed Response`, -`Probe Text`) %>%
  pivot_wider(names_from = "exp", values_from = "Example") %>%
  flextable() %>%
  set_caption(
    number_tab("Examples of Probing Behavior and Responses")
  ) %>%
  set_table_properties(layout = "autofit", width = 1) %>%
  fontsize(size = 8, j = 2:5)

# Effects of Elab/Rel Probing on Coded Response Quality Criteria ----------
message("Effects of Elab/Rel Probing on Coded Response Quality Criteria")

coded_qual_pre_post_resolved_with_covariates <- bind_rows(
  coded_qual_resolved %>%
    mutate(after_probe = treat=="Elaboration", coding_round = 1) %>%
    left_join(combined %>% 
                select(-matches("(issue|occu|news|econ)"), matches("probe_flag")),
              by = "respondent_id"),
  coded_qual_pre_resolved %>%
    mutate(after_probe = FALSE, coding_round = 2) %>%
    left_join(combined %>% 
                select(-matches("(issue|occu|news|econ)"), matches("probe_flag")),
              by = "respondent_id")
)


coded_qual_all_results <- map_dfr(
  quality_criteria,
  .progress = TRUE,
  function(out) {
    reg_subsets <- list("Most Imp.\nIssue"="issue", 
                        "Econ. Cond.\n(Reason)"="econd",
                        "Pooled"=c("issue","econd"))
    lapply(1:length(reg_subsets), function(k) {
      # for some reason, NAs with `lm_robust()` (not enough power?), sticking to `lm()`
      bind_rows(
        # Diff-in-Means: no controls
        coded_qual_pre_post_resolved_with_covariates %>%
          rename_at(out, ~"y") %>%
          filter(q %in% reg_subsets[[k]]) %>%
          mutate(y = as.numeric(y)) %>%
          filter((treat == "Elaboration" & after_probe) | 
                   (treat == "Control" & !after_probe)) %>%
          lm(y ~ treat, data = .) %>%
          tidy() %>%
          mutate(type = "No Controls", design = "Diff-in-Means")
        ,
        # Diff-in-Means: control for covariates
        coded_qual_pre_post_resolved_with_covariates %>%
          rename_at(out, ~"y") %>%
          filter(q %in% reg_subsets[[k]]) %>%
          mutate(y = as.numeric(y)) %>%
          filter((treat == "Elaboration" & after_probe) | 
                   (treat == "Control" & !after_probe)) %>%
          filter(!is.na(after_probe)) %>%
          lm(y ~ treat + work_status + gender + hh_inc5 + educ5 + log(age+1) + device_type, data = .) %>%
          tidy() %>%
          mutate(type = "Controls", design = "Diff-in-Means")
        ,
        # Pre-Post: no controls
        coded_qual_pre_post_resolved_with_covariates %>%
          rename_at(out, ~"y") %>%
          filter(q %in% reg_subsets[[k]]) %>%
          mutate(y = as.numeric(y)) %>%
          filter(!is.na(after_probe)) %>%
          lm(y ~ after_probe, data = .) %>%
          tidy() %>%
          mutate(type = "No Controls", design = "Pre-Post")
        ,
        # Pre-Post: control for covariates
        coded_qual_pre_post_resolved_with_covariates %>%
          rename_at(out, ~"y") %>%
          filter(q %in% reg_subsets[[k]]) %>%
          mutate(y = as.numeric(y)) %>%
          filter(!is.na(after_probe)) %>%
          lm(y ~ after_probe + work_status + gender + hh_inc5 + educ5 + log(age+1) + device_type, data = .) %>%
          tidy() %>%
          mutate(type = "Controls", design = "Pre-Post")
        # ,
        # # Diff-in-Diff: no controls (not currently possible to estimate)
        # coded_qual_pre_post_resolved_with_covariates %>%
        #   rename_at(out, ~"y") %>%
        #   filter(q %in% reg_subsets[[k]]) %>%
        #   mutate(y = as.numeric(y)) %>%
        #   filter(!is.na(after_probe)) %>%
        #   lm(y ~ after_probe*treat, data = .) %>%
        #   tidy() %>%
        #   mutate(type = "No Controls", design = "Diff-in-Diff")
        # ,
        # # Diff-in-Diff: control for covariates (not currently possible to estimate)
        # coded_qual_pre_post_resolved_with_covariates %>%
        #   rename_at(out, ~"y") %>%
        #   filter(q %in% reg_subsets[[k]]) %>%
        #   mutate(y = as.numeric(y)) %>%
        #   filter(!is.na(after_probe)) %>%
        #   lm(y ~ after_probe*treat + work_status + gender + hh_inc5 + educ5 + log(age+1) + device_type, data = .) %>%
        #   tidy() %>%
        #   mutate(type = "Controls", design = "Diff-in-Diff")
      ) %>%
        mutate(outcome = out %>% 
                 stringr::str_replace_all("_", " ") %>%
                 stringr::str_to_title(),
               subset = names(reg_subsets)[[k]])
    })
  })


coded_qual_all_results %>%
  filter(design == "Diff-in-Means") %>%
  mutate(outcome = as_factor(outcome),
         subset = as_factor(subset),
         type = as_factor(type)) %>%
  adj_bhq(alpha = global_stat_sig_level) %>%
  filter(grepl("treat", term)) %>%
  ggplot(aes(x=type, y=estimate, ymin=conf.low, ymax=conf.high,
             group=type, shape=type, alpha=ifelse(stat.sig, stat_sig_label, stat_insig_label))) +
  geom_hline(yintercept = 0, lty = 2, alpha = 0.5) +
  geom_pointrange(size = 1, position = position_dodge(width=0.5)) +
  geom_text(aes(label = sprintf("%.1f%%", estimate*100) %>%
                  ifelse(abs(estimate) < 0.001, "≈0%", .) %>%
                  ifelse(estimate > 0 & abs(estimate) >= 0.001, paste0("+",.), .), 
                y = ifelse(estimate > 0.3, conf.low, conf.high),
                vjust = ifelse(estimate > 0.3, 1.5, -0.5)), 
            position = position_dodge(width=0.75), size=5) +
  labs(caption = str_wrap(paste("Note: Higher levels correspond to more positive evaluations.",
                                "Control covariates include work status, gender, education, age, and device type."), 150)) +
  scale_y_continuous(name = "OLS Estimate of Elab/Rel. Treatment\n(95% CI with BHq Correction)",
                     labels = scales::percent_format(1)) +
  scale_x_discrete(name = "Estimate", labels = \(.) gsub("Treatment Group","",.)) +
  scale_alpha_manual(values=c(1, 0.2), name = "P-value:") +
  scale_fill_identity(name = "P-value:") +
  scale_shape(name = "Specification:") +
  facet_grid(outcome ~ subset, scales="free_x") +
  theme_bw() +
  theme(strip.text = element_text(size=22, angle=0),
        strip.text.y = element_text(angle=0, hjust=0),
        legend.position = "bottom",
        legend.title = element_text(size=16),
        legend.text = element_text(size=14),
        plot.caption = element_text(lineheight = 1, size = 10, hjust = 0),
        panel.background = element_rect(),
        axis.text.x = element_blank(),
        axis.ticks.x = element_blank(),
        axis.text.y = element_text(size=18),
        axis.title = element_text(size=16, lineheight=1))

ggsave(here(paste0("figures/main_fx_qual_criteria_dim", IMG_FILE_EXT)), 
       height=12, width=12)

# Effects of Elab/Rel Probing on NLP-Based Informational Measures ---------
message("Effects of Elab/Rel Probing on NLP-Based Informational Measures")

nlp_measures_l <- nlp_measures %>%
  mutate(response = case_when(
    treatment == "Control" & grepl("econd_reason_1", response) ~ "econd_last_response",
    TRUE ~ response
  )) %>%
  mutate(exp = gsub("_(combined|last)_response", "", response) %>%
           exp_ordered_labels_f()) %>%
  pivot_longer(names_to = "measure",
               values_to = "val",
               unique_words:shannon_entropy)

nlp_measures_l_with_covariates <- nlp_measures_l %>%
  left_join(combined %>% 
              select(-matches("(issue|occu|news|econ)"), matches("probe_flag")),
            by = c("treatment","respondent_id"))

nlp_measure_names_main <- nlp_measure_names[1:5]

nlp_results <- map_dfr(
  1:length(names(nlp_measure_names_main)),
  .progress = TRUE,
  function(k) {
    message(nlp_measure_names_main[[k]])
    
    map_dfr(
      c("issue","econd","news","occu"),
      function(e) {
        message(" ", e)
        
        d_1 <- nlp_measures_l_with_covariates %>% 
          filter(!(grepl("(News|Occu)", exp) & grepl("Control", treatment))) %>%
          filter(!(grepl("(Issue|Econ)", exp) & grepl("Conf", treatment))) %>%
          filter(grepl("last_response", response) & !(grepl("why_frustrated", response))) %>%
          filter(measure == names(nlp_measure_names_main)[k] &
                   exp == exp_ordered_labels[[e]]) %>%
          rename(y = val) %>%
          filter(!is.na(y)) %>%
          mutate(y = (y - mean(y))/sd(y))
        
        d_2 <- nlp_measures_l_with_covariates %>% 
          filter(!(grepl("(News|Occu)", exp) & grepl("Control", treatment))) %>%
          filter(!(grepl("(Issue|Econ)", exp) & grepl("Conf", treatment))) %>%
          mutate(response = case_when(
            treatment == "Control" & grepl("econd_last", response) ~ "econd_combined_response",
            TRUE ~ response
          )) %>%
          filter(grepl("combined_response", response) & !(grepl("why_frustrated", response))) %>%
          filter(measure == names(nlp_measure_names_main)[k] &
                   exp == exp_ordered_labels[[e]]) %>%
          rename(y = val) %>%
          filter(!is.na(y)) %>%
          mutate(y = (y - mean(y))/sd(y))
        
        if (e == "occu") 
          f <- y ~ treatment + hh_inc5 +gender + educ5 + log(age+1) + device_type
        else
          f <- y ~ treatment + work_status + hh_inc5 +gender + educ5 + log(age+1) + device_type
        
        tryCatch({
          bind_rows(
            d_1 %>%
              lm(y ~ treatment, data = .) %>%
              tidy(conf.level = 0.95, conf.int = TRUE) %>%
              mutate(outcome = "Post Probe", specif = "No Controls"),
            d_2 %>%
              lm(y ~ treatment, data = .) %>%
              tidy(conf.level = 0.95, conf.int = TRUE) %>%
              mutate(outcome = "Combined", specif = "No Controls"),
            d_1 %>%
              lm(f, data = .) %>%
              tidy(conf.level = 0.95, conf.int = TRUE) %>%
              mutate(outcome = "Post Probe", specif = "Controls"),
            d_2 %>%
              lm(f, data = .) %>%
              tidy(conf.level = 0.95, conf.int = TRUE) %>%
              mutate(outcome = "Combined", specif = "Controls")
          ) %>%
            mutate(measure = nlp_measure_names_main[[k]] %>% 
                     stringr::str_wrap(width = 10),
                   exp = e)
        }, error = function(err) {
          browser()
        })
      })
  })

nlp_results %>%
  mutate(exp = exp_ordered_labels_f(exp),
         specif = as_factor(specif),
         outcome = factor(outcome,
                          levels = c(
                            "Post Probe",
                            "Combined"
                          ))) %>%
  mutate(stat.sig = p.value < global_stat_sig_level) %>%
  filter(grepl("treatment", term)) %>%
  adj_bhq(alpha = global_stat_sig_level) %>%
  ggplot(aes(x=outcome, 
             y=estimate, 
             group=specif,
             shape=specif,
             ymin=conf.low, ymax=conf.high,
             # group=type, shape=type,
             alpha=ifelse(stat.sig, stat_sig_label, stat_insig_label))) +
  scale_y_continuous(name = "OLS Estimate of Elab/Rel Group Membership on NLP Measure\n(95% CI with BHq Correction)") +
  scale_x_discrete(name = "Specific Outcome for Elab/Rel. Group", 
                   labels = \(.) gsub("treatment","",.)) +
  labs(caption = "Note: Outcomes are standardized via z-transformation.") +
  geom_hline(yintercept = 0, lty = 2, alpha = 0.5) +
  geom_pointrange(size = 0.75, position = position_dodge(1)) +
  geom_text(aes(label = sprintf("%s%.1f", 
                                ifelse(estimate >=0, "+", "-"),
                                abs(estimate)), 
                y = case_when(estimate > 0.3 ~ conf.low,
                              estimate <= 0.3 ~ conf.high),
                vjust = case_when(estimate > 0.3 ~ 1.5, 
                                  estimate <= 0.3 ~ -0.5)), 
            position = position_dodge(width=1), 
            size=3.5, fontface = "bold") +
  scale_shape(name = "Specification:") +
  facet_grid(measure ~ exp, scales = "free") +
  scale_alpha_manual(values=c(1, 0.2), name = "P-value:") +
  # scale_fill_identity(name = "Effect:") +
  scale_color_identity(name = "Effect:") +
  # scale_shape(name = "Outcome\nMeasure:") +
  theme_bw() + 
  theme(strip.text = element_text(size=17, lineheight=1),
        legend.position = "bottom",
        legend.title = element_text(size=12),
        legend.text = element_text(size=12),
        plot.caption = element_text(lineheight = 1, size = 10, hjust = 0),
        panel.background = element_rect(),
        strip.text.y = element_text(size=18, angle=0, hjust=0),
        axis.text.x = element_text(size=16, angle=40, hjust=1),
        axis.text = element_text(size=18),
        axis.title = element_text(size=16, lineheight=1))

ggsave(here(paste0("figures/main_fx_nlp_measures", IMG_FILE_EXT)), 
       height=12, width=12)

# Effects of Probing on Respondent Attrition ------------------------------
message("Effects of Probing on Respondent Attrition")

nr_types <- list("Dropout\n(Right After)"="_dropout_rightafter",
                 "Dropout\n(Any Time After)"="_dropout_after")

nr_results <- map_dfr(
  1:length(nr_types),
  .progress = TRUE,
  function(k) {
    map_dfr(
      c("issue","econd_reason","news","occu"),
      function(exp) {
        d <- combined %>%
          rename_at(paste0(exp, nr_types[[k]]), ~"y")
        
        if (exp == "occu") 
          f <- y ~ treatment + hh_inc5 + gender + educ5 + age5 + device_type
        else
          f <- y ~ treatment + work_status + hh_inc5 + gender + educ5 + age5 + device_type
        
        bind_rows(
          d %>%
            lm(y ~ treatment, data = .) %>%
            tidy() %>%
            mutate(type = "No Controls")
          ,
          d %>%
            lm(f, data = .) %>%
            tidy() %>%
            mutate(type = "Controls")
        ) %>%
          mutate(nr_type = names(nr_types)[k],
                 exp = exp)
      })
  }) 

nr_results %>%
  mutate(nr_type = as_factor(nr_type),
         type = as_factor(type),
         exp = exp_ordered_labels_f(exp)) %>%
  adj_bhq(alpha = global_stat_sig_level) %>%
  filter(grepl("treatment", term)) %>%
  ggplot(aes(x=term, y=estimate, ymin=conf.low, ymax=conf.high,
             group=type, shape=type, 
             alpha=ifelse(stat.sig %in% c(NA,T), stat_sig_label, stat_insig_label))) +
  labs(caption = str_wrap(paste("Note: Higher levels correspond to higher rates of attrition.",
                                "Control covariates include work status, gender, education, age, and device type."
                                ), 150)) +
  scale_y_continuous(name = "OLS Estimate of Treatment Group Membership on Dropout\n(95% CI with BHq Correction)",
                     labels = scales::percent_format(1)) +
  scale_x_discrete(name = "Treatment (Relative to Control Group)", 
                   labels = \(.) gsub("treatment","",.)) +
  geom_hline(yintercept = 0, lty = 2, alpha = 0.5) +
  geom_pointrange(size = 1, position = position_dodge(width=0.5)) +
  geom_text(aes(label = sprintf("%.1f%%", estimate*100) %>%
                  ifelse(abs(estimate) < 0.001, "≈0%", .) %>%
                  ifelse(estimate > 0 & abs(estimate) >= 0.001, paste0("+",.), .),
                y = ifelse(estimate > 0, conf.low, conf.high),
                vjust = ifelse(estimate > 0, 2, -1)),
            position = position_dodge(width=0.75), size=4) +
  facet_grid(nr_type ~ exp) +
  scale_alpha_manual(values=c(1, 0.2), name = "P-value:") +
  scale_fill_identity(name = "P-value:") +
  scale_shape(name = "Specification:") +
  theme_bw() + 
  theme(strip.text = element_text(size=17),
        legend.position = "bottom",
        legend.title = element_text(size=16),
        legend.text = element_text(size=14),
        plot.caption = element_text(lineheight = 1, size = 10, hjust = 0),
        panel.background = element_rect(),
        strip.text.y = element_text(size=18, lineheight=1),
        axis.text.x = element_text(size=16, angle=40, hjust=1),
        axis.text.y = element_text(size=18),
        axis.title = element_text(size=16, lineheight=1))

ggsave(here(paste0("figures/main_fx_attrition", IMG_FILE_EXT)), 
       height=8, width=12)

# Effects of Probing on Self-Reported User Experience ---------------------
message("Effects of Probing on Self-Reported User Experience")

map_dfr(c("quality","ease","frustration","satisfaction"),
        .progress = TRUE,
        function(out) {
          bind_rows(
            # just diff-in-means
            combined %>%
              rename_at(out, ~"y") %>%
              mutate(y = as.numeric(y)) %>%
              mutate(y = y/max(y, na.rm=T)) %>%
              lm_robust(y ~ treatment, data = .) %>%
              tidy() %>%
              mutate(type = "No Controls")
            ,
            # control for covariates
            combined %>%
              rename_at(out, ~"y") %>%
              mutate(y = as.numeric(y)) %>%
              mutate(y = y/max(y, na.rm=T)) %>%
              lm_robust(y ~ treatment + work_status + gender + hh_inc5 + educ5 + log(age+1) + device_type, data = .) %>%
              tidy() %>%
              mutate(type = "Controls")
          ) %>%
            mutate(outcome = out %>% 
                     stringr::str_replace_all("_", " ") %>%
                     stringr::str_to_title())
        }) %>%
  mutate(outcome = as_factor(outcome),
         type = as_factor(type)) %>%
  adj_bhq(alpha = global_stat_sig_level) %>%
  filter(grepl("treat", term)) %>%
  ggplot(aes(x=term, y=estimate, ymin=conf.low, ymax=conf.high,
             group=type, shape=type, alpha=ifelse(stat.sig, stat_sig_label, stat_insig_label))) +
  geom_hline(yintercept = 0, lty = 2) +
  coord_cartesian(clip = "off") +
  geom_pointrange(size = 1, position = position_dodge(width=0.5)) +
  geom_text(aes(label = sprintf("%.2f", estimate), 
                y = ifelse(estimate > 0 | outcome == "Quality", 
                           conf.low, 
                           conf.high),
                vjust = ifelse(estimate > 0 | outcome == "Quality", 
                               1.5, 
                               -1.25)), 
            position = position_dodge(width=0.5), size=5) +
  labs(caption = str_wrap(paste("Note: Higher levels correspond to more positive evaluations.",
                                "Control covariates include work status, gender, education, age, and device type.",
                                "Outcomes are all normalized to the [0-1] range."), 150)) +
  scale_y_continuous(name = "OLS Coefficient (95% CI with Robust S.E. and BHq Correction)",
                     limits = c(-0.06, 0.06),
                     breaks = c(-0.06, -0.03, 0, 0.03, 0.06)) +
  scale_x_discrete(name = "Treatment (Relative to Control Group)", labels = \(.) gsub("treatment","",.)) +
  scale_alpha_manual(values=c(1, 0.2), name = "P-value:") +
  coord_cartesian(clip = "off") +
  scale_fill_identity(name = "P-value:") +
  scale_shape(name = "Specification:") +
  facet_wrap(~ outcome) +
  theme_bw() +
  theme(strip.text = element_text(size=22, lineheight=1),
        legend.position = "bottom",
        legend.title = element_text(size=12),
        legend.text = element_text(size=12),
        plot.caption = element_text(lineheight = 1, size = 10, hjust = 0),
        panel.background = element_rect(),
        axis.text = element_text(size=18),
        axis.title = element_text(size=16, lineheight=1))

ggsave(here(paste0("figures/main_fx_user_experience", IMG_FILE_EXT)), 
       height=8, width=12)
